## borramos el espacion de trabajo
rm(list =ls() )

##importamos la librerias
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.4      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
##traemos el CSV de fumadores desde git
path<- 'https://raw.githubusercontent.com/lacamposm/Fundamentos_Analitica/main/data/titanic3.csv'
df <- read.csv2(path, sep = ",", stringsAsFactors = T, na.strings="", dec = ".")
str(head(df,10))
## 'data.frame':    10 obs. of  14 variables:
##  $ pclass   : int  1 1 1 1 1 1 1 1 1 1
##  $ survived : int  1 1 0 0 0 1 1 0 1 0
##  $ name     : Factor w/ 1307 levels "Abbing, Mr. Anthony",..: 22 24 25 26 27 31 46 47 51 55
##  $ sex      : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2
##  $ age      : num  29 0.917 2 30 25 ...
##  $ sibsp    : int  0 1 1 1 1 0 1 0 2 0
##  $ parch    : int  0 2 2 2 2 0 0 0 0 0
##  $ ticket   : Factor w/ 929 levels "110152","110413",..: 188 50 50 50 50 125 93 16 77 826
##  $ fare     : num  211 152 152 152 152 ...
##  $ cabin    : Factor w/ 186 levels "A10","A11","A14",..: 44 80 80 80 80 150 146 16 62 NA
##  $ embarked : Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 3 3 3 3 1
##  $ boat     : Factor w/ 27 levels "1","10","11",..: 12 3 NA NA NA 13 2 NA 27 NA
##  $ body     : int  NA NA NA 135 NA NA NA NA NA 22
##  $ home.dest: Factor w/ 369 levels "?Havana, Cuba",..: 309 231 231 231 231 237 162 24 22 229
class(df)
## [1] "data.frame"

##pclass Clase del pasajero.(1 = 1°; 2 = 2°; 3 = 3°). ##survived Supervivencia (0 = No; 1 = Sí). ##name Nombre. ##sex Sexo. ##age Edad. ##sibsp Numero de hermanos/cónyuges a bordo (esposa/prome). ##parch Número de padres/hijos a bordo (par / hijo). ##ticket Número del billete. ##fare Tarifa pagada. ##cabin Cabina. ##embarked Puerto de embarque. (C = Cherburgo; Q = Queenstown; S = Southampton). ##boat Bote salva vidas. ##body Número de identificación del cuerpo. ##home.dest Destino.

##Modificamos los valores a logicos

df$survived = as.logical(df$survived)

new_df <- df %>% filter(survived=='TRUE') %>% filter(embarked !='NA')  %>% group_by(survived , pclass ) 
str(new_df)
## gropd_df [498 × 14] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ pclass   : int [1:498] 1 1 1 1 1 1 1 1 1 1 ...
##  $ survived : logi [1:498] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ name     : Factor w/ 1307 levels "Abbing, Mr. Anthony",..: 22 24 31 46 51 70 73 93 94 100 ...
##  $ sex      : Factor w/ 2 levels "female","male": 1 2 2 1 1 1 1 1 2 1 ...
##  $ age      : num [1:498] 29 0.917 48 63 53 ...
##  $ sibsp    : int [1:498] 0 1 0 1 2 1 0 0 0 0 ...
##  $ parch    : int [1:498] 0 2 0 0 0 0 0 0 0 1 ...
##  $ ticket   : Factor w/ 929 levels "110152","110413",..: 188 50 125 93 77 834 796 119 297 801 ...
##  $ fare     : num [1:498] 211.3 151.6 26.6 78 51.5 ...
##  $ cabin    : Factor w/ 186 levels "A10","A11","A14",..: 44 80 150 146 62 98 34 NA 9 49 ...
##  $ embarked : Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 1 1 3 3 1 ...
##  $ boat     : Factor w/ 27 levels "1","10","11",..: 12 3 13 2 27 14 22 18 24 18 ...
##  $ body     : int [1:498] NA NA NA NA NA NA NA NA NA NA ...
##  $ home.dest: Factor w/ 369 levels "?Havana, Cuba",..: 309 231 237 162 22 237 258 NA 158 230 ...
##  - attr(*, "groups")= tibble [3 × 3] (S3: tbl_df/tbl/data.frame)
##   ..$ survived: logi [1:3] TRUE TRUE TRUE
##   ..$ pclass  : int [1:3] 1 2 3
##   ..$ .rows   : list<int> [1:3] 
##   .. ..$ : int [1:198] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:119] 199 200 201 202 203 204 205 206 207 208 ...
##   .. ..$ : int [1:181] 318 319 320 321 322 323 324 325 326 327 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
titanic <- ggplot(new_df) + aes(x=pclass, color = embarked) + geom_density() +
  labs(y="Puerto de embarque",x="Clase del pasajero",title="Supervivencia") +
  theme(axis.text.y = element_blank(), axis.ticks = element_blank() ) 

ggplotly(titanic)
new_df2 <- new_df %>% summarize(embarked =  n())
## `summarise()` has grouped output by 'survived'. You can override using the
## `.groups` argument.
head(new_df2,10)
## # A tibble: 3 × 3
## # Groups:   survived [1]
##   survived pclass embarked
##   <lgl>     <int>    <int>
## 1 TRUE          1      198
## 2 TRUE          2      119
## 3 TRUE          3      181
titanic2 <- ggplot(new_df2) + 
  aes(x=pclass , y=embarked) +
  geom_bar(stat = "identity", fill = c("white", "green", "blue") ) +
 labs(y="Puerto de embarque",x="Clase del pasajero",title="Supervivencia") +
  theme(axis.text.y = element_blank(), axis.ticks = element_blank() ) 

ggplotly(titanic2)
new_df3 <- df %>% filter(survived=='TRUE') %>% filter(embarked !='NA')  %>% group_by(embarked,pclass) 

titanic3 <- ggplot(new_df3) +
  aes(x=pclass, fill=embarked) +
  geom_bar( color = 'black') +
  labs(y="embarque",x="clase",title="Supervivencia / clase") +
  theme(axis.text.y = element_blank(), axis.ticks = element_blank() ) 

ggplotly(titanic3)
new_df3 <- df %>% filter(survived=='TRUE') %>% filter(embarked !='NA')  %>% group_by(embarked , pclass , sex) 

titanic4 <- ggplot(new_df3) +
  aes(x=embarked,Y=pclass,fill=sex) +
  geom_bar( color = 'black') +
  labs(y="Cantidad",x="Embarque",title="Supervivencia / Genero") +
  theme(axis.text.y = element_blank(), axis.ticks = element_blank() ) 

ggplotly(titanic4)
## Warning: The following aesthetics were dropped during statistical transformation: Y
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Puerto de embarque. (C = Cherburgo; Q = Queenstown; S = Southampton).